{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Copyright (c) MONAI Consortium  \n",
    "Licensed under the Apache License, Version 2.0 (the \"License\");  \n",
    "you may not use this file except in compliance with the License.  \n",
    "You may obtain a copy of the License at  \n",
    "&nbsp;&nbsp;&nbsp;&nbsp;http://www.apache.org/licenses/LICENSE-2.0  \n",
    "Unless required by applicable law or agreed to in writing, software  \n",
    "distributed under the License is distributed on an \"AS IS\" BASIS,  \n",
    "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  \n",
    "See the License for the specific language governing permissions and  \n",
    "limitations under the License.\n",
    "\n",
    "# Data loading pipeline examples\n",
    "\n",
    "The purpose of this notebook is to illustrate reading Nifti files and test speed of different methods.\n",
    "\n",
    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/main/acceleration/transform_speed.ipynb)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!python -c \"import monai\" || pip install -q \"monai-weekly[nibabel]\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import glob\n",
    "import os\n",
    "import shutil\n",
    "import tempfile\n",
    "\n",
    "import nibabel as nib\n",
    "import numpy as np\n",
    "import torch\n",
    "\n",
    "try:\n",
    "    torch.multiprocessing.set_start_method(\"spawn\")\n",
    "except RuntimeError:\n",
    "    pass\n",
    "\n",
    "\n",
    "from monai.config import print_config\n",
    "from monai.data import ArrayDataset, create_test_image_3d\n",
    "from monai.transforms import (\n",
    "    EnsureChannelFirst,\n",
    "    Compose,\n",
    "    LoadImage,\n",
    "    RandAffine,\n",
    "    RandSpatialCrop,\n",
    "    Rotate,\n",
    "    ScaleIntensity,\n",
    ")\n",
    "from monai.utils import first\n",
    "\n",
    "print_config()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup data directory\n",
    "\n",
    "You can specify a directory with the `MONAI_DATA_DIRECTORY` environment variable.  \n",
    "This allows you to save results and reuse downloads.  \n",
    "If not specified a temporary directory will be used."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/tmp/tmpvaqesd_z\n"
     ]
    }
   ],
   "source": [
    "directory = os.environ.get(\"MONAI_DATA_DIRECTORY\")\n",
    "if directory:\n",
    "    directory = os.path.join(directory, \"transform_speed\")\n",
    "    os.makedirs(directory, exist_ok=True)\n",
    "root_dir = tempfile.mkdtemp() if directory is None else directory\n",
    "print(root_dir)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 0. Preparing input data (nifti images)\n",
    "\n",
    "Create a number of test Nifti files, 3d single channel images with spatial size (256, 256, 256) voxels."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(5):\n",
    "    im, seg = create_test_image_3d(256, 256, 256)\n",
    "\n",
    "    n = nib.Nifti1Image(im, np.eye(4))\n",
    "    nib.save(n, os.path.join(root_dir, f\"im{i}.nii.gz\"))\n",
    "\n",
    "    n = nib.Nifti1Image(seg, np.eye(4))\n",
    "    nib.save(n, os.path.join(root_dir, f\"seg{i}.nii.gz\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# prepare list of image names and segmentation names\n",
    "images = sorted(glob.glob(os.path.join(root_dir, \"im*.nii.gz\")))\n",
    "segs = sorted(glob.glob(os.path.join(root_dir, \"seg*.nii.gz\")))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1. Test image loading with minimal preprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(3, 1, 256, 256, 256) (3, 1, 256, 256, 256)\n"
     ]
    }
   ],
   "source": [
    "imtrans = Compose([LoadImage(image_only=True), EnsureChannelFirst()])\n",
    "\n",
    "segtrans = Compose([LoadImage(image_only=True), EnsureChannelFirst()])\n",
    "\n",
    "ds = ArrayDataset(images, imtrans, segs, segtrans)\n",
    "loader = torch.utils.data.DataLoader(ds, batch_size=3, num_workers=8)\n",
    "\n",
    "im, seg = first(loader)\n",
    "print(im.shape, seg.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 26.1 ms, sys: 172 ms, total: 198 ms\n",
      "Wall time: 11 s\n"
     ]
    }
   ],
   "source": [
    "%time data = next(iter(loader))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. Test image-patch loading with CPU multi-processing:\n",
    "\n",
    "- rotate (256, 256, 256)-voxel in the plane axes=(1, 2)\n",
    "- extract random (64, 64, 64) patches\n",
    "- implemented in MONAI using ` scipy.ndimage.rotate`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(3, 1, 64, 64, 64) (3, 1, 64, 64, 64)\n"
     ]
    }
   ],
   "source": [
    "images = sorted(glob.glob(os.path.join(root_dir, \"im*.nii.gz\")))\n",
    "segs = sorted(glob.glob(os.path.join(root_dir, \"seg*.nii.gz\")))\n",
    "\n",
    "imtrans = Compose(\n",
    "    [\n",
    "        LoadImage(image_only=True),\n",
    "        ScaleIntensity(),\n",
    "        EnsureChannelFirst(),\n",
    "        Rotate(angle=np.pi / 4),\n",
    "        RandSpatialCrop((64, 64, 64), random_size=False),\n",
    "    ]\n",
    ")\n",
    "\n",
    "segtrans = Compose(\n",
    "    [\n",
    "        LoadImage(image_only=True),\n",
    "        EnsureChannelFirst(),\n",
    "        Rotate(angle=np.pi / 4),\n",
    "        RandSpatialCrop((64, 64, 64), random_size=False),\n",
    "    ]\n",
    ")\n",
    "\n",
    "ds = ArrayDataset(images, imtrans, segs, segtrans)\n",
    "loader = torch.utils.data.DataLoader(ds, batch_size=3, num_workers=8, pin_memory=torch.cuda.is_available())\n",
    "\n",
    "im, seg = first(loader)\n",
    "print(im.shape, seg.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 37.6 ms, sys: 498 ms, total: 536 ms\n",
      "Wall time: 24.6 s\n"
     ]
    }
   ],
   "source": [
    "%time data = next(iter(loader))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "(the above results were based on Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3. Test image-patch loading with preprocessing on GPU:\n",
    "\n",
    "- random rotate (256, 256, 256)-voxel in the plane axes=(1, 2)\n",
    "- extract random (64, 64, 64) patches\n",
    "- implemented in MONAI using native pytorch resampling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(3, 1, 64, 64, 64) (3, 1, 64, 64, 64)\n"
     ]
    }
   ],
   "source": [
    "images = sorted(glob.glob(os.path.join(root_dir, \"im*.nii.gz\")))\n",
    "segs = sorted(glob.glob(os.path.join(root_dir, \"seg*.nii.gz\")))\n",
    "\n",
    "# same parameter with different interpolation mode for image and segmentation\n",
    "rand_affine_img = RandAffine(\n",
    "    prob=1.0,\n",
    "    rotate_range=np.pi / 4,\n",
    "    translate_range=(96, 96, 96),\n",
    "    spatial_size=(64, 64, 64),\n",
    "    mode=\"bilinear\",\n",
    "    device=torch.device(\"cuda:0\"),\n",
    ")\n",
    "rand_affine_seg = RandAffine(\n",
    "    prob=1.0,\n",
    "    rotate_range=np.pi / 4,\n",
    "    translate_range=(96, 96, 96),\n",
    "    spatial_size=(64, 64, 64),\n",
    "    mode=\"nearest\",\n",
    "    device=torch.device(\"cuda:0\"),\n",
    ")\n",
    "\n",
    "imtrans = Compose([LoadImage(image_only=True), ScaleIntensity(), EnsureChannelFirst(), rand_affine_img])\n",
    "\n",
    "segtrans = Compose([LoadImage(image_only=True), EnsureChannelFirst(), rand_affine_seg])\n",
    "\n",
    "ds = ArrayDataset(images, imtrans, segs, segtrans)\n",
    "loader = torch.utils.data.DataLoader(ds, batch_size=3, num_workers=0)\n",
    "\n",
    "im, seg = first(loader)\n",
    "\n",
    "print(im.shape, seg.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 19.4 s, sys: 2.67 s, total: 22 s\n",
      "Wall time: 4.83 s\n"
     ]
    }
   ],
   "source": [
    "%time data = next(iter(loader))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Tesla V100-SXM2-16GB-N\n",
      "|===========================================================================|\n",
      "|                  PyTorch CUDA memory summary, device ID 0                 |\n",
      "|---------------------------------------------------------------------------|\n",
      "|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |\n",
      "|===========================================================================|\n",
      "|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |\n",
      "|---------------------------------------------------------------------------|\n",
      "| Allocated memory      |   16387 KB |   24580 KB |  118883 KB |  102496 KB |\n",
      "|---------------------------------------------------------------------------|\n",
      "| Active memory         |   16387 KB |   24580 KB |  118883 KB |  102496 KB |\n",
      "|---------------------------------------------------------------------------|\n",
      "| GPU reserved memory   |   43008 KB |   43008 KB |   43008 KB |       0 B  |\n",
      "|---------------------------------------------------------------------------|\n",
      "| Non-releasable memory |    6141 KB |   22527 KB |  274525 KB |  268384 KB |\n",
      "|---------------------------------------------------------------------------|\n",
      "| Allocations           |       8    |      15    |     226    |     218    |\n",
      "|---------------------------------------------------------------------------|\n",
      "| Active allocs         |       8    |      15    |     226    |     218    |\n",
      "|---------------------------------------------------------------------------|\n",
      "| GPU reserved segments |       3    |       3    |       3    |       0    |\n",
      "|---------------------------------------------------------------------------|\n",
      "| Non-releasable allocs |       5    |       7    |     165    |     160    |\n",
      "|===========================================================================|\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(torch.cuda.get_device_name(0))\n",
    "print(torch.cuda.memory_summary(0, abbreviated=True))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4. Test image-patch loading with preprocessing on GPU using the Cupy backend:\n",
    "\n",
    "In the cupy package is installed correctly along with MONAI, \n",
    "setting the `mode` to an integer in `[0-5]` and `device` to a cuda device will enable the cupy backend resampling.\n",
    "\n",
    "- random rotate (256, 256, 256)-voxel in the plane axes=(1, 2)\n",
    "- extract random (64, 64, 64) patches\n",
    "- implemented in MONAI using the cupy backend for high-order spline interpolation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(3, 1, 64, 64, 64) (3, 1, 64, 64, 64)\n"
     ]
    }
   ],
   "source": [
    "images = sorted(glob.glob(os.path.join(root_dir, \"im*.nii.gz\")))\n",
    "segs = sorted(glob.glob(os.path.join(root_dir, \"seg*.nii.gz\")))\n",
    "\n",
    "# same parameter with different interpolation mode for image and segmentation\n",
    "rand_affine_img = RandAffine(\n",
    "    prob=1.0,\n",
    "    rotate_range=np.pi / 4,\n",
    "    translate_range=(96, 96, 96),\n",
    "    spatial_size=(64, 64, 64),\n",
    "    mode=3,\n",
    "    padding_mode=\"reflect\",\n",
    "    device=torch.device(\"cuda:0\"),\n",
    ")\n",
    "rand_affine_seg = RandAffine(\n",
    "    prob=1.0,\n",
    "    rotate_range=np.pi / 4,\n",
    "    translate_range=(96, 96, 96),\n",
    "    spatial_size=(64, 64, 64),\n",
    "    mode=0,\n",
    "    padding_mode=\"reflect\",\n",
    "    device=torch.device(\"cuda:0\"),\n",
    ")\n",
    "\n",
    "imtrans = Compose([LoadImage(image_only=True), ScaleIntensity(), EnsureChannelFirst(), rand_affine_img])\n",
    "\n",
    "segtrans = Compose([LoadImage(image_only=True), EnsureChannelFirst(), rand_affine_seg])\n",
    "\n",
    "ds = ArrayDataset(images, imtrans, segs, segtrans)\n",
    "loader = torch.utils.data.DataLoader(ds, batch_size=3, num_workers=0)\n",
    "\n",
    "im, seg = first(loader)\n",
    "\n",
    "print(im.shape, seg.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 15.4 s, sys: 3.15 s, total: 18.5 s\n",
      "Wall time: 7.7 s\n"
     ]
    }
   ],
   "source": [
    "%time data = next(iter(loader))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Cleanup data directory\n",
    "\n",
    "Remove directory if a temporary was used."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "if directory is None:\n",
    "    shutil.rmtree(root_dir)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
